#setwd('/afs/inf.ed.ac.uk/user/s17/s1725186/Documents/PhD-Models/FirstPUModel/RMarkdowns')

library(tidyverse) ; library(reshape2) ; library(glue) ; library(plotly) ; library(dendextend)
library(RColorBrewer) ; library(viridis) ; require(gridExtra) ; library(GGally) ; library(ggpubr)
library(expss)
library(polycor)
library(foreach) ; library(doParallel)
library(knitr) ; library(kableExtra)
library(biomaRt)
library(clusterProfiler) ; library(ReactomePA) ; library(DOSE) ; library(org.Hs.eg.db)
library(WGCNA)

SFARI_colour_hue = function(r) {
  pal = c('#FF7631','#FFB100','#E8E328','#8CC83F','#62CCA6','#59B9C9','#b3b3b3','#808080','gray','#d9d9d9')[r]
}

Load preprocessed dataset (preprocessing code in 01_data_preprocessing.Rmd) and clustering (pipeline in 05_WGCNA.Rmd)

# Gandal dataset
load('./../Data/preprocessed_data.RData')
datExpr = datExpr %>% data.frame
DE_info = DE_info %>% data.frame


# GO Neuronal annotations: regex 'neuron' in GO functional annotations and label the genes that make a match as neuronal
GO_annotations = read.csv('./../Data/genes_GO_annotations.csv')
GO_neuronal = GO_annotations %>% filter(grepl('neuron', go_term)) %>% 
              mutate('ID'=as.character(ensembl_gene_id)) %>% 
              dplyr::select(-ensembl_gene_id) %>% distinct(ID) %>%
              mutate('Neuronal'=1)


# SFARI Genes
SFARI_genes = read_csv('./../../../SFARI/Data/SFARI_genes_01-03-2020_w_ensembl_IDs.csv')
SFARI_genes = SFARI_genes[!duplicated(SFARI_genes$ID) & !is.na(SFARI_genes$ID),]


# Clusterings
clusterings = read_csv('./../Data/clusters.csv')


# Update DE_info with SFARI and Neuronal information
genes_info = DE_info %>% mutate('ID'=rownames(.)) %>% left_join(SFARI_genes, by='ID') %>% 
             mutate(`gene-score`=ifelse(is.na(`gene-score`), 'Others', `gene-score`)) %>%
             left_join(GO_neuronal, by='ID') %>% left_join(clusterings, by='ID') %>%
             mutate(Neuronal=ifelse(is.na(Neuronal), 0, Neuronal)) %>%
             mutate(gene.score=ifelse(`gene-score`=='Others' & Neuronal==1, 'Neuronal', `gene-score`), 
                    significant=padj<0.05 & !is.na(padj))

# Add gene symbol
getinfo = c('ensembl_gene_id','external_gene_id')
mart = useMart(biomart='ENSEMBL_MART_ENSEMBL', dataset='hsapiens_gene_ensembl',
               host='feb2014.archive.ensembl.org') ## Gencode v19
gene_names = getBM(attributes=getinfo, filters=c('ensembl_gene_id'), values=genes_info$ID, mart=mart)

genes_info = genes_info %>% left_join(gene_names, by=c('ID'='ensembl_gene_id'))


clustering_selected = 'DynamicHybrid'
genes_info$Module = genes_info[,clustering_selected]

dataset = read.csv(paste0('./../Data/dataset_', clustering_selected, '.csv'))
dataset$Module = dataset[,clustering_selected]

load('./../Data/GSEA.RData')
GSEA_SFARI = enrichment_SFARI

load('./../Data/ORA.RData')
ORA_SFARI = enrichment_SFARI

rm(DE_info, GO_annotations, clusterings, getinfo, mart, dds, enrichment_DGN, enrichment_DO, enrichment_GO,
   enrichment_KEGG, enrichment_Reactome, enrichment_SFARI)


Selecting Top Modules


We have results both from GSEA and ORA to measure the enrichment of SFARI Genes in each module, and they both agree with each other relatively well

SFARI_genes_by_module = c()

for(module in names(GSEA_SFARI)){
  
  GSEA_info = GSEA_SFARI[[module]] %>% dplyr::select(ID, pvalue, p.adjust, NES) %>%
              mutate(pvalue = ifelse(NES>0, pvalue, 1-pvalue), 
                     p.adjust = ifelse(NES>0, p.adjust, 1)) %>%
              dplyr::rename('GSEA_pval' = pvalue, 'GSEA_padj'= p.adjust)
  
  ORA_info = ORA_SFARI[[module]] %>% dplyr::select(ID, pvalue, p.adjust, qvalue, GeneRatio, Count) %>%
             dplyr::rename('ORA_pval' = pvalue, 'ORA_padj' = p.adjust)
  
  module_info = GSEA_info %>% full_join(ORA_info, by = 'ID') %>% add_column(.before = 'ID', Module = module)
  
  SFARI_genes_by_module = rbind(SFARI_genes_by_module, module_info)
}

SFARI_genes_by_module = SFARI_genes_by_module %>% 
                        left_join(dataset %>% dplyr::select(Module, MTcor) %>% 
                                  group_by(Module,MTcor) %>% tally %>% ungroup, by = 'Module') %>%
                        mutate(ORA_pval = ifelse(is.na(ORA_pval), 1, ORA_pval),
                               ORA_padj = ifelse(is.na(ORA_padj), 1, ORA_padj))

plot_data = SFARI_genes_by_module %>% filter(ID=='SFARI')

ggplotly(plot_data %>% ggplot(aes(1-GSEA_pval, 1-ORA_pval, size = n)) + 
         geom_point(color = plot_data$Module, alpha = .7, aes(id=Module)) + 
         geom_smooth(se=FALSE, color = '#CCCCCC') + 
         xlab('GSEA Enrichment') + ylab('ORA Enrichment') + coord_fixed() +
         ggtitle(paste0('Corr = ', round(cor(plot_data$GSEA_pval, plot_data$ORA_pval),2))) +
         theme_minimal() + theme(legend.position = 'none'))



To determine which modules have a statistically significant enrichment in SFARI Genes we can use the adjusted p-values. We used the Bonferroni correction for this.

GSEA identifies 22/55 as significant. This doesn’t make sense

ggplotly(plot_data %>% ggplot(aes(GSEA_padj, ORA_padj, size = n)) + 
         geom_point(color = plot_data$Module, alpha = .7, aes(id=Module)) + 
         geom_smooth(se=FALSE, color = '#CCCCCC') + 
         geom_hline(yintercept = 0.01, color = 'gray', linetype = 'dashed') +
         geom_vline(xintercept = 0.01, color = 'gray', linetype = 'dashed') +
         xlab('GSEA adjusted p-value') + ylab('ORA adjusted p-value') + 
         scale_x_log10(limits = c(min(plot_data$GSEA_padj, plot_data$ORA_padj),1.2)) + 
         scale_y_log10(limits = c(min(plot_data$GSEA_padj, plot_data$ORA_padj),1.2)) +
         ggtitle(paste0('Corr = ',round(cor(plot_data$GSEA_padj, plot_data$ORA_padj),2))) + coord_fixed() +
         theme_minimal() + theme(legend.position = 'none'))
plot_data = plot_data %>% mutate(GSEA_sig = GSEA_padj<0.01, ORA_sig = ORA_padj<0.01) %>%
            apply_labels(GSEA_sig = 'GSEA significant enrichment',
                         ORA_sig = 'ORA significant enrichment')

cro(plot_data$GSEA_sig, list(plot_data$ORA_sig, total()))
 ORA significant enrichment     #Total 
 FALSE   TRUE   
 GSEA significant enrichment 
   FALSE  33   33
   TRUE  18 4   22
   #Total cases  51 4   55



The ‘over-enrichment’ in SFARI Modules in GSEA could be because SFARI Genes have in general higher Module Memberships than the other genes, which would make them cluster at the beginning of the list constantly and would bias the enrichment analysis.

Looking at the plot below, we can see that there is not a uniform distribution of SFARI genes across all quantiles of the Module Membership values, but they instead seem to cluster around Module Membership values with high magnitudes (both positive and negative), so I don’t think the GSEA results for the SFARI genes are valid.

Because of this, I’m going to use the enrichment from the ORA to study the SFARI Genes

quant_data = dataset %>% dplyr::select(ID, contains('MM.')) %>% 
             left_join(genes_info %>% dplyr::select(ID, gene.score), by = 'ID') %>% dplyr::select(-ID) %>%
             melt %>% mutate(quant = cut(value, breaks = quantile(value, probs = seq(0,1,0.05)) %>% 
                                     as.vector, labels = FALSE)) %>%
             group_by(gene.score, quant) %>% tally %>% ungroup %>% ungroup
  
quant_data = quant_data %>% group_by(quant) %>% summarise(N = sum(n)) %>% ungroup %>% 
            left_join(quant_data, by = 'quant') %>% dplyr::select(quant, gene.score, n, N) %>% 
            mutate(p = round(100*n/N,2)) %>% filter(!is.na(quant)) %>%
            mutate(gene.score = factor(gene.score, levels = rev(c('1','2','3','Neuronal','Others'))))

ggplotly(quant_data %>% filter(!gene.score %in% c('Neuronal','Others')) %>% 
         ggplot(aes(quant, p, fill = gene.score)) + geom_bar(stat='identity') + 
         xlab('Module Membership Quantiles') + ylab('% of SFARI Genes in Quantile') +
         ggtitle('Percentage of Genes labelled as SFARI in each Quantile') +
         scale_fill_manual(values = SFARI_colour_hue(r=rev(c(1:3)))) + 
         theme_minimal() + theme(legend.position = 'none'))
rm(quant_data)


Selecting modules with an adjusted p-value below 0.01 using the ORA

ggplotly(plot_data %>% ggplot(aes(MTcor, ORA_padj, size=n)) + 
         geom_point(color=plot_data$Module, alpha=0.5, aes(id=Module)) +
         geom_hline(yintercept = 0.01, color = 'gray', linetype = 'dotted') + 
         xlab('Module-Diagnosis Correlation') + ylab('Corrected p-values') + scale_y_log10() +
         ggtitle('Modules Significantly Enriched in SFARI Genes') +
         theme_minimal() + theme(legend.position = 'none'))
top_modules = plot_data %>% arrange(desc(ORA_padj)) %>% filter(ORA_padj<0.01) %>% pull(Module) %>% as.character

plot_data %>% filter(Module %in% top_modules) %>% arrange(ORA_pval) %>%
              dplyr::select(Module, MTcor, ORA_pval, ORA_padj, qvalue, GeneRatio, Count) %>%
              rename( ORA_pval = 'p-value', ORA_padj = 'Adjusted p-value') %>%
              kable %>% kable_styling(full_width = F)
Module MTcor p-value Adjusted p-value qvalue GeneRatio Count
#44A0FF 0.6175450 0.0000001 0.0000005 0.0000001 59/574 59
#96A900 0.0721036 0.0000001 0.0000006 0.0000002 27/173 27
#00C0BF -0.5573039 0.0000732 0.0003658 0.0000770 34/336 34
#00BADE -0.9002528 0.0011573 0.0057863 0.0024363 101/1504 101
pca = datExpr %>% prcomp

plot_data = data.frame('ID'=rownames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
            left_join(dataset, by='ID') %>% 
            left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>%
            dplyr::select(ID, external_gene_id, PC1, PC2, Module, gene.score) %>%
            mutate(ImportantModules = ifelse(Module %in% top_modules, as.character(Module), 'Others')) %>%
            mutate(color = ifelse(ImportantModules=='Others','gray',ImportantModules),
                   alpha = ifelse(ImportantModules=='Others', 0.2, 0.4),
                   gene_id = paste0(ID, ' (', external_gene_id, ')')) %>%
            apply_labels(ImportantModules = 'Top Modules')

plot_data %>% ggplot(aes(PC1, PC2, color=ImportantModules)) + 
              geom_point(alpha=plot_data$alpha, color=plot_data$color, aes(ID=gene_id)) + theme_minimal() +
              xlab(paste0('PC1 (',round(100*summary(pca)$importance[2,1],2),'%)')) +
              ylab(paste0('PC2 (',round(100*summary(pca)$importance[2,2],2),'%)')) +
              ggtitle('Genes belonging to the Modules with the highest SFARI Genes Enrichment')

rm(pca)





Identifying representative genes for each Module


Following the WGCNA pipeline, selecting the genes with the highest Module Membership and Gene Significance

Top 20 genes for each module


Ordered by \(\frac{MM+|GS|}{2}\)

There aren’t that many SFARI genes in the top genes of the modules

create_table = function(module){
  top_genes = dataset %>% left_join(genes_info %>% dplyr::select(ID, external_gene_id), by='ID') %>% 
              dplyr::select(ID, external_gene_id, paste0('MM.',gsub('#','',module)), GS, gene.score) %>%
              filter(dataset$Module==module) %>% dplyr::rename('MM' = paste0('MM.',gsub('#','',module))) %>% 
              mutate(Relevance = (MM+abs(GS))/2) %>% arrange(by=-Relevance) %>% top_n(20) %>%
              dplyr::rename('Gene Symbol' = external_gene_id, 'SFARI Score' = gene.score)
  return(top_genes)
}

top_genes = list()
for(i in 1:length(top_modules)) top_genes[[i]] = create_table(top_modules[i])

kable(top_genes[[1]] %>% dplyr::select(-ID), caption=paste0('Top 20 genes for Module ', top_modules[1], 
      '  (MTcor = ', round(dataset$MTcor[dataset$Module == top_modules[1]][1],2),')')) %>% 
      kable_styling(full_width = F)
Top 20 genes for Module #00BADE (MTcor = -0.9)
Gene Symbol MM GS SFARI Score Relevance
MAPK9 0.8936385 -0.9396129 None 0.9166257
TRIM37 0.8897530 -0.9297290 None 0.9097410
PREPL 0.8974015 -0.9038264 None 0.9006140
NAP1L5 0.8663709 -0.9041442 None 0.8852576
TTBK2 0.8871227 -0.8745765 None 0.8808496
EIF5A2 0.8379288 -0.9113247 None 0.8746267
PRKCE 0.8392500 -0.9082418 None 0.8737459
ATP6V1C1 0.8503040 -0.8959641 None 0.8731340
SCN8A 0.9469981 -0.7886101 1 0.8678041
DIRAS1 0.8365571 -0.8964601 None 0.8665086
ENO2 0.8805819 -0.8473660 None 0.8639740
ATP6V1A 0.8076082 -0.9160321 None 0.8618202
NAPB 0.8935221 -0.8296895 None 0.8616058
KIF3A 0.8715770 -0.8454561 None 0.8585166
RCAN2 0.7836787 -0.9149672 None 0.8493229
MAP7D2 0.8191900 -0.8791508 None 0.8491704
SCN1A 0.8943166 -0.8001031 1 0.8472099
GABRA1 0.9132443 -0.7756802 None 0.8444622
SULT4A1 0.8785958 -0.8102475 None 0.8444216
SNAP25 0.8768766 -0.8096627 3 0.8432696
kable(top_genes[[2]] %>% dplyr::select(-ID), caption=paste0('Top 20 genes for Module ', top_modules[2], 
      '  (MTcor = ', round(dataset$MTcor[dataset$Module == top_modules[2]][1],2),')')) %>% 
      kable_styling(full_width = F)
Top 20 genes for Module #00C0BF (MTcor = -0.56)
Gene Symbol MM GS SFARI Score Relevance
FN3K 0.8072660 -0.7995855 None 0.8034257
KIF1A 0.8359343 -0.7546420 None 0.7952881
CELSR2 0.8605644 -0.7116319 None 0.7860982
STX1B 0.8044900 -0.7631064 None 0.7837982
EPB41L1 0.8167212 -0.7310976 None 0.7739094
PRKACA 0.8864856 -0.6592289 None 0.7728572
SYN1 0.8761983 -0.6408596 3 0.7585290
CAMTA2 0.8136030 -0.6985069 None 0.7560549
WNK2 0.7562770 -0.7517751 None 0.7540260
MARCH6 0.7939972 -0.6981273 None 0.7460622
GSK3B 0.7631169 -0.7168148 None 0.7399659
CLIP3 0.9407134 -0.5380568 None 0.7393851
NOVA2 0.9276450 -0.5485881 None 0.7381166
GNL1 0.7864546 -0.6860016 None 0.7362281
ANKRD52 0.7958641 -0.6572514 None 0.7265578
SENP2 0.7242951 -0.7250487 None 0.7246719
PDE4A 0.7684484 -0.6776771 None 0.7230628
LPHN1 0.8255258 -0.6132289 None 0.7193774
WDR37 0.7461109 -0.6864702 None 0.7162905
VAMP2 0.8212414 -0.6094253 None 0.7153334
kable(top_genes[[3]] %>% dplyr::select(-ID), caption=paste0('Top 20 genes for Module ', top_modules[3], 
      '  (MTcor = ', round(dataset$MTcor[dataset$Module == top_modules[3]][1],2),')')) %>% 
      kable_styling(full_width = F)
Top 20 genes for Module #96A900 (MTcor = 0.07)
Gene Symbol MM GS SFARI Score Relevance
SETBP1 0.6969254 0.3905020 1 0.5437137
PEAK1 0.5630434 0.5079839 None 0.5355137
MED13L 0.6912952 0.3769560 1 0.5341256
PRDM2 0.7646423 -0.2516090 None 0.5081256
CNOT4 0.6592482 0.3325335 None 0.4958909
RNF111 0.7428504 -0.2393294 None 0.4910899
PCLO 0.7883514 -0.1873622 None 0.4878568
RC3H1 0.6732527 0.2650879 None 0.4691703
HIVEP3 0.5564658 0.3732487 2 0.4648573
TAB2 0.6952842 0.2276017 None 0.4614430
NFAT5 0.8109297 0.1024823 None 0.4567060
USP34 0.5661090 -0.3428537 None 0.4544813
RFX7 0.7671081 0.1381448 None 0.4526264
ASH1L 0.8106359 -0.0671050 1 0.4388705
PRR14L 0.5804654 0.2966230 None 0.4385442
ATRX 0.7482085 0.1257246 1 0.4369665
TRRAP 0.5285214 0.3413502 3 0.4349358
MIA3 0.6401164 -0.2244813 None 0.4322988
UBN2 0.6460176 0.2180843 2 0.4320510
HOOK3 0.5988875 -0.2632619 None 0.4310747
kable(top_genes[[4]] %>% dplyr::select(-ID), caption=paste0('Top 20 genes for Module ', top_modules[4], 
      '  (MTcor = ', round(dataset$MTcor[dataset$Module == top_modules[4]][1],2),')')) %>% 
      kable_styling(full_width = F)
Top 20 genes for Module #44A0FF (MTcor = 0.62)
Gene Symbol MM GS SFARI Score Relevance
PCF11 0.8429024 0.7312292 None 0.7870658
KMT2C 0.8520729 0.6717670 1 0.7619200
JMJD1C 0.7598589 0.7508695 3 0.7553642
PHF3 0.8223400 0.6783133 1 0.7503266
MED13 0.7104699 0.7786295 1 0.7445497
ZNF594 0.8024167 0.6845153 None 0.7434660
ZC3H11A 0.8143606 0.6526249 3 0.7334928
RANBP2 0.7468053 0.6875198 None 0.7171626
ZNF532 0.7642551 0.6625591 None 0.7134071
NUP153 0.7528033 0.6629534 None 0.7078784
SCAF11 0.8366856 0.5739281 None 0.7053069
NIN 0.7596626 0.6465768 None 0.7031197
ZCCHC6 0.8105208 0.5938393 None 0.7021801
CDK12 0.8247663 0.5690233 None 0.6968948
HNRNPU 0.7258735 0.6576555 1 0.6917645
SMEK1 0.7853711 0.5922045 None 0.6887878
ROCK1 0.8007812 0.5736512 None 0.6872162
EPB41L2 0.7212127 0.6416288 None 0.6814207
SETX 0.8139855 0.5431843 None 0.6785849
DMD 0.8318008 0.5219177 None 0.6768593
rm(create_table, i)

The top genes tend to have a high mean level of expression

pca = datExpr %>% prcomp

ids = c()
for(tg in top_genes) ids = c(ids, tg$ID)

plot_data = data.frame('ID'=rownames(datExpr), 'PC1' = pca$x[,1], 'PC2' = pca$x[,2]) %>%
            left_join(dataset, by='ID') %>% dplyr::select(ID, PC1, PC2, Module, gene.score) %>%
            mutate(color = ifelse(Module %in% top_modules, as.character(Module), 'gray')) %>%
            mutate(alpha = ifelse(color %in% top_modules & ID %in% ids, 1, 0.1))

plot_data %>% ggplot(aes(PC1, PC2)) + geom_point(alpha=plot_data$alpha, color=plot_data$color) + 
              xlab(paste0('PC1 (',round(100*summary(pca)$importance[2,1],2),'%)')) +
              ylab(paste0('PC2 (',round(100*summary(pca)$importance[2,2],2),'%)')) +
              theme_minimal() + ggtitle('Most relevant genes for top Modules')

rm(ids, pca, tg, plot_data)




Enrichment Analysis


Using the package clusterProfiler. Performing Gene Set Enrichment Analysis (GSEA) and Over Representation Analysis (ORA) using the following datasets:

  • Gene Ontology

  • Disease Ontology

  • Disease Gene Network

  • KEGG

  • REACTOME

# GSEA
load('./../Data/GSEA.RData')

# Rename lists
GSEA_GO = enrichment_GO
GSEA_DGN = enrichment_DGN
GSEA_DO = enrichment_DO
GSEA_KEGG = enrichment_KEGG
GSEA_Reactome = enrichment_Reactome
GSEA_SFARI = enrichment_SFARI


# ORA
load('./../Data/ORA.RData')

# Rename lists
ORA_GO = enrichment_GO
ORA_DGN = enrichment_DGN
ORA_DO = enrichment_DO
ORA_KEGG = enrichment_KEGG
ORA_Reactome = enrichment_Reactome
ORA_SFARI = enrichment_SFARI

rm(enrichment_GO, enrichment_DO, enrichment_DGN, enrichment_KEGG, enrichment_Reactome)
compare_methods = function(GSEA_list, ORA_list){
  
  for(top_module in top_modules){
  
    cat(paste0('  \n  \nEnrichments for Module ', top_module, ' (MTcor=',
               round(dataset$MTcor[dataset$Module==top_module][1],2), '):  \n  \n'))
    
    GSEA = GSEA_list[[top_module]]
    ORA = ORA_list[[top_module]]
    
    cat(paste0('GSEA has ', nrow(GSEA), ' enriched terms  \n'))
    cat(paste0('ORA has  ', nrow(ORA), ' enriched terms  \n'))
    cat(paste0(sum(ORA$ID %in% GSEA$ID), ' terms are enriched in both methods  \n  \n'))

    plot_data = GSEA %>% mutate(pval_GSEA = p.adjust) %>% dplyr::select(ID, Description, NES, pval_GSEA) %>%
                inner_join(ORA %>% mutate(pval_ORA = p.adjust) %>% 
                           dplyr::select(ID, pval_ORA, GeneRatio, qvalue), by = 'ID') 
    
    if(nrow(plot_data)>0){
      print(plot_data %>% mutate(pval_mean = pval_ORA + pval_GSEA) %>% 
                          arrange(pval_mean) %>% dplyr::select(-pval_mean) %>% 
            kable %>% kable_styling(full_width = F))
    }
  } 
}


plot_results = function(GSEA_list, ORA_list){
  
  l = htmltools::tagList()

  for(i in 1:length(top_modules)){
    
    GSEA = GSEA_list[[top_modules[i]]]
    ORA = ORA_list[[top_modules[i]]]
    
    plot_data = GSEA %>% mutate(pval_GSEA = p.adjust) %>% dplyr::select(ID, Description, NES, pval_GSEA) %>%
                inner_join(ORA %>% mutate(pval_ORA = p.adjust) %>% dplyr::select(ID, pval_ORA), by = 'ID')
    
    if(nrow(plot_data)>5){
      min_val = min(min(plot_data$pval_GSEA), min(plot_data$pval_ORA))
      max_val = max(max(max(plot_data$pval_GSEA), max(plot_data$pval_ORA)),0.05)
      ggp = ggplotly(plot_data %>% ggplot(aes(pval_GSEA, pval_ORA, color = NES)) + 
                     geom_point(aes(id = Description)) + 
                     geom_vline(xintercept = 0.05, color = 'gray', linetype = 'dotted') + 
                     geom_hline(yintercept = 0.05, color = 'gray', linetype = 'dotted') + 
                     ggtitle(paste0('Enriched terms in common for Module ', top_modules[i])) +
                     scale_x_continuous(limits = c(min_val, max_val)) + 
                     scale_y_continuous(limits = c(min_val, max_val)) + 
                     xlab('Corrected p-value for GSEA') + ylab('Corrected p-value for ORA') +
                     scale_colour_viridis(direction = -1) + theme_minimal() + coord_fixed())
      l[[i]] = ggp
    }
  }
  
  return(l)
}


KEGG

compare_methods(GSEA_KEGG, ORA_KEGG)

Enrichments for Module #00BADE (MTcor=-0.9):

GSEA has 44 enriched terms
ORA has 14 enriched terms
12 terms are enriched in both methods

ID Description NES pval_GSEA pval_ORA GeneRatio qvalue
hsa04020 Calcium signaling pathway 1.997146 0.0043946 0.0001522 37/543 0.0000607
hsa04728 Dopaminergic synapse 2.224843 0.0046546 0.0002936 28/543 0.0000780
hsa04070 Phosphatidylinositol signaling system 1.960017 0.0048200 0.0013863 22/543 0.0002764
hsa05033 Nicotine addiction 2.313572 0.0053252 0.0043829 12/543 0.0006990
hsa04724 Glutamatergic synapse 2.150640 0.0047448 0.0056285 23/543 0.0007481
hsa04720 Long-term potentiation 2.247426 0.0050307 0.0189579 16/543 0.0015512
hsa04750 Inflammatory mediator regulation of TRP channels 1.853197 0.0192749 0.0194522 20/543 0.0015512
hsa04024 cAMP signaling pathway 1.675747 0.0437914 0.0156488 33/543 0.0015512
hsa04713 Circadian entrainment 2.198473 0.0048187 0.0604770 19/543 0.0043843
hsa04360 Axon guidance 1.681080 0.0663946 0.0081992 32/543 0.0009341
hsa04970 Salivary secretion 2.273165 0.0049516 0.0967544 16/543 0.0056301
hsa04929 GnRH secretion 1.942345 0.0354635 0.0988421 14/543 0.0056301

Enrichments for Module #00C0BF (MTcor=-0.56):

GSEA has 46 enriched terms
ORA has 1 enriched terms
1 terms are enriched in both methods

ID Description NES pval_GSEA pval_ORA GeneRatio qvalue
hsa04962 Vasopressin-regulated water reabsorption 2.210918 0.0055392 0.0585734 6/147 0.0553325

Enrichments for Module #96A900 (MTcor=0.07):

GSEA has 24 enriched terms
ORA has 1 enriched terms
0 terms are enriched in both methods

Enrichments for Module #44A0FF (MTcor=0.62):

GSEA has 31 enriched terms
ORA has 2 enriched terms
1 terms are enriched in both methods

ID Description NES pval_GSEA pval_ORA GeneRatio qvalue
hsa05168 Herpes simplex virus 1 infection 2.63523 0.0038342 0 48/212 0


Plots of the results when there are more than 5 terms in common between methods:

plot_results(GSEA_Reactome, ORA_Reactome)


Reactome

compare_methods(GSEA_Reactome, ORA_Reactome)

Enrichments for Module #00BADE (MTcor=-0.9):

GSEA has 81 enriched terms
ORA has 14 enriched terms
12 terms are enriched in both methods

ID Description NES pval_GSEA pval_ORA GeneRatio qvalue
R-HSA-112316 Neuronal System 2.716357 0.0164711 0.0000000 83/813 0.0000000
R-HSA-112315 Transmission across Chemical Synapses 2.633929 0.0175987 0.0002076 48/813 0.0000471
R-HSA-112314 Neurotransmitter receptors and postsynaptic signal transmission 2.564667 0.0183316 0.0000147 42/813 0.0000063
R-HSA-1296071 Potassium Channels 2.258075 0.0201397 0.0000208 27/813 0.0000063
R-HSA-983712 Ion channel transport 1.834103 0.0185571 0.0027016 35/813 0.0003500
R-HSA-1296072 Voltage gated Potassium channels 2.507780 0.0219822 0.0011173 15/813 0.0001688
R-HSA-442755 Activation of NMDA receptors and postsynaptic events 2.464782 0.0203097 0.0158104 21/813 0.0017920
R-HSA-180024 DARPP-32 events 2.563404 0.0227878 0.0378210 10/813 0.0034295
R-HSA-442982 Ras activation upon Ca2+ influx through NMDA receptor 2.357164 0.0231261 0.0489730 9/813 0.0040370
R-HSA-438064 Post NMDA receptor activation events 2.399184 0.0206225 0.0775438 18/813 0.0054087
R-HSA-438066 Unblocking of NMDA receptors, glutamate binding and activation 2.404319 0.0230288 0.0761934 9/813 0.0054087
R-HSA-6794362 Protein-protein interactions at synapses 2.328002 0.0201942 0.0912816 20/813 0.0059122

Enrichments for Module #00C0BF (MTcor=-0.56):

GSEA has 70 enriched terms
ORA has 1 enriched terms
1 terms are enriched in both methods

ID Description NES pval_GSEA pval_ORA GeneRatio qvalue
R-HSA-9022692 Regulation of MECP2 expression and activity 2.22417 0.0233862 0.0045334 7/202 0.0043893

Enrichments for Module #96A900 (MTcor=0.07):

GSEA has 109 enriched terms
ORA has 1 enriched terms
0 terms are enriched in both methods

Enrichments for Module #44A0FF (MTcor=0.62):

GSEA has 147 enriched terms
ORA has 1 enriched terms
0 terms are enriched in both methods


Plots of the results when there are more than 5 terms in common between methods:

plot_results(GSEA_Reactome, ORA_Reactome)


Gene Ontology

compare_methods(GSEA_GO, ORA_GO)

Enrichments for Module #00BADE (MTcor=-0.9):

GSEA has 145 enriched terms
ORA has 96 enriched terms
29 terms are enriched in both methods

ID Description NES pval_GSEA pval_ORA GeneRatio qvalue
GO:0034762 regulation of transmembrane transport 2.141873 0.0696237 0.0000000 91/1239 0.0000000
GO:0034765 regulation of ion transmembrane transport 2.262229 0.0712931 0.0000000 82/1239 0.0000000
GO:0015672 monovalent inorganic cation transport 2.235542 0.0713551 0.0000018 76/1239 0.0000006
GO:0050808 synapse organization 2.192044 0.0720889 0.0001134 68/1239 0.0000118
GO:0099177 regulation of trans-synaptic signaling 2.427900 0.0721720 0.0000732 68/1239 0.0000086
GO:0050804 modulation of chemical synaptic transmission 2.428109 0.0722069 0.0000655 68/1239 0.0000086
GO:0042391 regulation of membrane potential 2.143565 0.0732511 0.0009078 60/1239 0.0000709
GO:0023061 signal release 2.198170 0.0721085 0.0025859 64/1239 0.0001616
GO:1904062 regulation of cation transmembrane transport 2.232521 0.0751080 0.0003572 54/1239 0.0000335
GO:0050890 cognition 2.031060 0.0759452 0.0010708 50/1239 0.0000772
GO:0007611 learning or memory 2.183693 0.0774993 0.0007774 46/1239 0.0000662
GO:0006813 potassium ion transport 2.203148 0.0810243 0.0000027 42/1239 0.0000006
GO:0099504 synaptic vesicle cycle 2.691746 0.0818072 0.0021165 35/1239 0.0001417
GO:0032409 regulation of transporter activity 2.388352 0.0774611 0.0066624 44/1239 0.0003903
GO:0071804 cellular potassium ion transport 2.177835 0.0846153 0.0000656 32/1239 0.0000086
GO:0071805 potassium ion transmembrane transport 2.177835 0.0846153 0.0000656 32/1239 0.0000086
GO:0022898 regulation of transmembrane transporter activity 2.419973 0.0780026 0.0108402 42/1239 0.0005977
GO:0032412 regulation of ion transmembrane transporter activity 2.453577 0.0782320 0.0114878 41/1239 0.0005982
GO:2001257 regulation of cation channel activity 2.556723 0.0822670 0.0155089 32/1239 0.0007185
GO:0035637 multicellular organismal signaling 2.111649 0.0804880 0.0256310 35/1239 0.0010445
GO:0036465 synaptic vesicle recycling 2.537675 0.0936917 0.0160975 16/1239 0.0007185
GO:0048667 cell morphogenesis involved in neuron differentiation 2.057200 0.0696770 0.0417899 71/1239 0.0015744
GO:0006836 neurotransmitter transport 2.495251 0.0779514 0.0713878 40/1239 0.0021584
GO:0048488 synaptic vesicle endocytosis 2.336862 0.0959053 0.0543081 13/1239 0.0018180
GO:0140238 presynaptic endocytosis 2.336862 0.0959053 0.0543081 13/1239 0.0018180
GO:0060078 regulation of postsynaptic membrane potential 2.264422 0.0847285 0.0695810 26/1239 0.0021584
GO:0010959 regulation of metal ion transport 1.675501 0.0739326 0.0895323 52/1239 0.0024682
GO:2000310 regulation of NMDA receptor activity 2.641243 0.0971384 0.0792075 12/1239 0.0023200
GO:0007416 synapse assembly 2.071926 0.0813752 0.0952305 32/1239 0.0025503

Enrichments for Module #00C0BF (MTcor=-0.56):

GSEA has 216 enriched terms
ORA has 11 enriched terms
6 terms are enriched in both methods

ID Description NES pval_GSEA pval_ORA GeneRatio qvalue
GO:0098693 regulation of synaptic vesicle cycle 2.676548 0.0906601 0.0078619 12/293 0.0075490
GO:0051648 vesicle localization 2.401201 0.0807178 0.0420907 18/293 0.0120343
GO:2000300 regulation of synaptic vesicle exocytosis 2.591080 0.0951176 0.0562097 9/293 0.0120343
GO:0032386 regulation of intracellular transport 2.069479 0.0762095 0.0868989 22/293 0.0120343
GO:0017158 regulation of calcium ion-dependent exocytosis 2.594297 0.0924794 0.0709074 10/293 0.0120343
GO:1902803 regulation of synaptic vesicle transport 2.616382 0.0943473 0.0989168 9/293 0.0120343

Enrichments for Module #96A900 (MTcor=0.07):

GSEA has 137 enriched terms
ORA has 10 enriched terms
0 terms are enriched in both methods

Enrichments for Module #44A0FF (MTcor=0.62):

GSEA has 152 enriched terms
ORA has 13 enriched terms
6 terms are enriched in both methods

ID Description NES pval_GSEA pval_ORA GeneRatio qvalue
GO:0033044 regulation of chromosome organization 1.833167 0.0741368 0.0024700 28/471 0.0011782
GO:0016569 covalent chromatin modification 1.858639 0.0706300 0.0137434 33/471 0.0032778
GO:0051053 negative regulation of DNA metabolic process 2.024191 0.0837346 0.0018339 17/471 0.0011782
GO:0002821 positive regulation of adaptive immune response 2.143569 0.0876414 0.0085758 13/471 0.0027271
GO:2001251 negative regulation of chromosome organization 1.962921 0.0841836 0.0272971 15/471 0.0052083
GO:0016570 histone modification 1.823909 0.0709501 0.0548226 31/471 0.0087168


Plots of the results when there are more than 5 terms in common between methods:

plot_results(GSEA_GO, ORA_GO)


Disease Ontology

compare_methods(GSEA_DO, ORA_DO)

Enrichments for Module #00BADE (MTcor=-0.9):

GSEA has 93 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods

Enrichments for Module #00C0BF (MTcor=-0.56):

GSEA has 14 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods

Enrichments for Module #96A900 (MTcor=0.07):

GSEA has 7 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods

Enrichments for Module #44A0FF (MTcor=0.62):

GSEA has 50 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods


Disease Gene Network

compare_methods(GSEA_DGN, ORA_DGN)

Enrichments for Module #00BADE (MTcor=-0.9):

GSEA has 19 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods

Enrichments for Module #00C0BF (MTcor=-0.56):

GSEA has 25 enriched terms
ORA has 1 enriched terms
1 terms are enriched in both methods

ID Description NES pval_GSEA pval_ORA GeneRatio qvalue
umls:C3714756 Intellectual Disability 2.139147 0.0439511 0.0001857 28/269 0.0001828

Enrichments for Module #96A900 (MTcor=0.07):

GSEA has 24 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods

Enrichments for Module #44A0FF (MTcor=0.62):

GSEA has 28 enriched terms
ORA has 0 enriched terms
0 terms are enriched in both methods



Session info

sessionInfo()
## R version 3.6.3 (2020-02-29)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
## 
## locale:
##  [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
##  [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
##  [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    parallel  stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] WGCNA_1.69             fastcluster_1.1.25     dynamicTreeCut_1.63-1 
##  [4] org.Hs.eg.db_3.8.2     AnnotationDbi_1.46.1   IRanges_2.18.3        
##  [7] S4Vectors_0.22.1       Biobase_2.44.0         BiocGenerics_0.30.0   
## [10] DOSE_3.10.2            ReactomePA_1.28.0      clusterProfiler_3.12.0
## [13] biomaRt_2.40.5         kableExtra_1.1.0       knitr_1.28            
## [16] doParallel_1.0.15      iterators_1.0.12       foreach_1.5.0         
## [19] polycor_0.7-10         expss_0.10.2           ggpubr_0.2.5          
## [22] magrittr_1.5           GGally_1.5.0           gridExtra_2.3         
## [25] viridis_0.5.1          viridisLite_0.3.0      RColorBrewer_1.1-2    
## [28] dendextend_1.13.4      plotly_4.9.2           glue_1.4.1            
## [31] reshape2_1.4.4         forcats_0.5.0          stringr_1.4.0         
## [34] dplyr_1.0.0            purrr_0.3.4            readr_1.3.1           
## [37] tidyr_1.1.0            tibble_3.0.1           ggplot2_3.3.2         
## [40] tidyverse_1.3.0       
## 
## loaded via a namespace (and not attached):
##   [1] tidyselect_1.1.0            RSQLite_2.2.0              
##   [3] htmlwidgets_1.5.1           grid_3.6.3                 
##   [5] BiocParallel_1.18.1         munsell_0.5.0              
##   [7] codetools_0.2-16            preprocessCore_1.46.0      
##   [9] withr_2.2.0                 colorspace_1.4-1           
##  [11] GOSemSim_2.10.0             highr_0.8                  
##  [13] rstudioapi_0.11             ggsignif_0.6.0             
##  [15] labeling_0.3                urltools_1.7.3             
##  [17] GenomeInfoDbData_1.2.1      polyclip_1.10-0            
##  [19] bit64_0.9-7                 farver_2.0.3               
##  [21] vctrs_0.3.1                 generics_0.0.2             
##  [23] xfun_0.12                   R6_2.4.1                   
##  [25] GenomeInfoDb_1.20.0         graphlayouts_0.7.0         
##  [27] locfit_1.5-9.4              DelayedArray_0.10.0        
##  [29] bitops_1.0-6                reshape_0.8.8              
##  [31] fgsea_1.10.1                gridGraphics_0.5-0         
##  [33] assertthat_0.2.1            scales_1.1.1               
##  [35] ggraph_2.0.3                nnet_7.3-14                
##  [37] enrichplot_1.4.0            gtable_0.3.0               
##  [39] tidygraph_1.2.0             rlang_0.4.6                
##  [41] genefilter_1.66.0           splines_3.6.3              
##  [43] lazyeval_0.2.2              acepack_1.4.1              
##  [45] impute_1.58.0               broom_0.5.5                
##  [47] europepmc_0.4               checkmate_2.0.0            
##  [49] BiocManager_1.30.10         yaml_2.2.1                 
##  [51] modelr_0.1.6                crosstalk_1.1.0.1          
##  [53] backports_1.1.8             qvalue_2.16.0              
##  [55] Hmisc_4.4-0                 tools_3.6.3                
##  [57] ggplotify_0.0.5             ellipsis_0.3.1             
##  [59] ggridges_0.5.2              Rcpp_1.0.4.6               
##  [61] plyr_1.8.6                  base64enc_0.1-3            
##  [63] progress_1.2.2              zlibbioc_1.30.0            
##  [65] RCurl_1.98-1.2              prettyunits_1.1.1          
##  [67] rpart_4.1-15                cowplot_1.0.0              
##  [69] SummarizedExperiment_1.14.1 haven_2.2.0                
##  [71] ggrepel_0.8.2               cluster_2.1.0              
##  [73] fs_1.4.0                    data.table_1.12.8          
##  [75] DO.db_2.9                   triebeard_0.3.0            
##  [77] reprex_0.3.0                reactome.db_1.68.0         
##  [79] matrixStats_0.56.0          xtable_1.8-4               
##  [81] hms_0.5.3                   evaluate_0.14              
##  [83] XML_3.99-0.3                jpeg_0.1-8.1               
##  [85] readxl_1.3.1                compiler_3.6.3             
##  [87] crayon_1.3.4                htmltools_0.4.0            
##  [89] mgcv_1.8-31                 Formula_1.2-3              
##  [91] geneplotter_1.62.0          lubridate_1.7.4            
##  [93] DBI_1.1.0                   tweenr_1.0.1               
##  [95] dbplyr_1.4.2                MASS_7.3-51.6              
##  [97] rappdirs_0.3.1              Matrix_1.2-18              
##  [99] cli_2.0.2                   igraph_1.2.5               
## [101] GenomicRanges_1.36.1        pkgconfig_2.0.3            
## [103] rvcheck_0.1.8               foreign_0.8-76             
## [105] xml2_1.2.5                  annotate_1.62.0            
## [107] webshot_0.5.2               XVector_0.24.0             
## [109] rvest_0.3.5                 digest_0.6.25              
## [111] graph_1.62.0                rmarkdown_2.1              
## [113] cellranger_1.1.0            fastmatch_1.1-0            
## [115] htmlTable_1.13.3            curl_4.3                   
## [117] graphite_1.30.0             lifecycle_0.2.0            
## [119] nlme_3.1-147                jsonlite_1.7.0             
## [121] fansi_0.4.1                 pillar_1.4.4               
## [123] lattice_0.20-41             httr_1.4.1                 
## [125] survival_3.1-12             GO.db_3.8.2                
## [127] UpSetR_1.4.0                png_0.1-7                  
## [129] bit_1.1-15.2                ggforce_0.3.1              
## [131] stringi_1.4.6               blob_1.2.1                 
## [133] DESeq2_1.24.0               latticeExtra_0.6-29        
## [135] memoise_1.1.0